library(tidyverse)
library(WGCNA)
library(ggeasy)
library(ggpubr)
net_dir = "/pastel/projects/speakeasy_dlpfc/SpeakEasy_net_MF/"
expression_dir = "/pastel/projects/speakeasy_dlpfc/"
work_dir = "/pastel/Github_scripts/SpeakEasy_dlpfc/bulk_dlpfc/"
resource_dir = "/pastel/resources/20220203_snRNAseq_AMPAD/"
cell_dir = "/pastel/resources/cell_proportion_SN/"### Expression data
load(paste0(expression_dir, "exprdata_byregion.Rdata"))
expr_matx = as.data.frame(exprData_MF) # Residuals of the expression
colnames(expr_matx) = gsub("(.*)_(.*)", "\\2", colnames(expr_matx))
### Modules from SE
modules_file = read.table(paste0(net_dir, "geneBycluster.txt"), header = T)
modules_size = as.data.frame( table(modules_file$cluster_lv3))
colnames(modules_size) = c("module", "n_nodes")
### Eigengenes and average expression
load(paste0(net_dir, "lv3_moduleEigengenes.Rdata"))
mod_eigengene = lv3_moduleEigengenes$eigengenes
mod_average = lv3_moduleEigengenes$averageExpr
mod_average$projid = gsub("(.*)_(.*)", "\\2", rownames(mod_average)) #get the projid to match with phenotype data
rownames(mod_average) = mod_average$projid
mod_average$projid = NULL
### Phenotype
# load("/pastel/projects/spatial_t/pseudo_bulk/phenotypes.RData") # phenotypes
# phenotype_dt = phenotypes[match(rownames(mod_average), phenotypes$projid),]
# head(phenotype_dt[, c("projid", "cogng_demog_slope")])For each gene, we define a “fuzzy” measure of module membership by correlating its gene expression profile with the module eigengene of a given module. Highly connected intramodular hub genes tend to have high module membership values to the respective module. If the value is close to 1 or -1, the gene is highly correlated with the other genes inside the module.
ModuleMembership = as.data.frame(WGCNA::cor(t(expr_matx),
mod_average,
method = "pearson",
use="p")) # this function is faster than the R standand correlation one
### Get modules >= 30 nodes to show:
to_show = paste0("AE",as.character( modules_size$module[modules_size$n_nodes >= 30]))
mm_filt = ModuleMembership[, colnames(ModuleMembership) %in% to_show ]
# save(mod_average, ModuleMembership, mm_filt, file = paste0(work_dir, "Mod_membership_MF.Rdata"))
# createDT(ModuleMembership)Counts from the SN dataset.
# Major cell types
# cell_prop = read.delim2(paste0(resource_dir, "updated_annotations/cellprop_snRNAseq.txt"), header = T, colClasses = "character", stringsAsFactors = F, check.names = F)
# Sub cell types of SN
cell_prop = read.delim2(paste0(resource_dir, "updated_annotations/subcellprop_snRNAseq.txt"), header = T, colClasses = "character", stringsAsFactors = F, check.names = F)
# mjr_celltypes1 = names(cell_prop)[grep("Exc.", names(cell_prop))]
# mjr_celltypes2 = names(cell_prop)[grep("Inh.", names(cell_prop))]
# mjr_celltypes3 = names(cell_prop)[grep("Oli.", names(cell_prop))]
mjr_celltypes4 = names(cell_prop)[grep("Ast.", names(cell_prop))]
mjr_celltypes5 = names(cell_prop)[grep("Mic.", names(cell_prop))]
# mjr_celltypes6 = names(cell_prop)[grep("OPC.", names(cell_prop))]
# mjr_celltypes7 = names(cell_prop)[grep("End.", names(cell_prop))]
# mjr_celltypes = c(mjr_celltypes1, mjr_celltypes2, mjr_celltypes3, mjr_celltypes4, mjr_celltypes5, mjr_celltypes6, mjr_celltypes7)
glia = c(mjr_celltypes4, mjr_celltypes5)
cell_prop_f = cell_prop[, c("projid",glia) ]
cell_prop_f[,-1] <- sapply(cell_prop_f[,-1], as.numeric)The absolute value of the correlation between the gene and the trait. In this case, trait is cell counts.
Shinya: “I wonder if there is any connections between the gene membership of module 3 to the Ast and Mic cells.”
projid_in_common = cell_prop_f$projid[which(cell_prop_f$projid %in% colnames(expr_matx))]
cell_prop_f_in_common = cell_prop_f[match(projid_in_common,cell_prop_f$projid),]
expr_mf_m3 = expr_matx[modules_file$ensembl[modules_file$cluster_lv3 == 3], projid_in_common]
# dim(expr_mf_m3)
# dim(cell_prop_f_in_common)
GS_mf_m3 = as.data.frame(WGCNA::cor(t(expr_mf_m3), cell_prop_f_in_common[-1], use = 'pairwise.complete.obs'))
# join tables
mm_filt_gene = mm_filt
mm_filt_gene$ensembl = rownames(mm_filt_gene)
GS_mf_m3$ensembl = rownames(GS_mf_m3)
mm_gs_merged = mm_filt_gene %>% inner_join(GS_mf_m3, by="ensembl")ggplot(mm_gs_merged, aes(x=AE3, y=Ast.1)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.1") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Ast.2)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.2") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Ast.3)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.3") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Ast.4)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.4") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Ast.5)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.5") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Ast.6)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.6") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Ast.7)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.7") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Ast.8)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.8") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Ast.9)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.9") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Ast.10)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Ast.10") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.1)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.1") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.2)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.2") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.3)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.3") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.4)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.4") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.5)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.5") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.6)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.6") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.7)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.7") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.8)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.8") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.9)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.9") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.10)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.10") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.11)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.11") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.12)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.12") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.13)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.13") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.14)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.14") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.15)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.15") +
theme_classic()ggplot(mm_gs_merged, aes(x=AE3, y=Mic.16)) +
geom_point() +
stat_smooth(method = "lm", se=F) +
stat_regline_equation(aes(label = ..adj.rr.label..), show.legend = F) +
easy_labs(x = "Module membership in MF_M3", y = "Gene significance for Mic.16") +
theme_classic()sessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Stream 8
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggpubr_0.4.0 ggeasy_0.1.3 WGCNA_1.70-3
## [4] fastcluster_1.2.3 dynamicTreeCut_1.63-1 forcats_0.5.1
## [7] stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4
## [10] readr_2.1.2 tidyr_1.2.0 tibble_3.1.6
## [13] ggplot2_3.4.1 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ggsignif_0.6.3 ellipsis_0.3.2
## [4] htmlTable_2.4.0 XVector_0.34.0 base64enc_0.1-3
## [7] fs_1.5.2 rstudioapi_0.13 farver_2.1.0
## [10] bit64_4.0.5 AnnotationDbi_1.56.2 fansi_1.0.2
## [13] lubridate_1.8.0 xml2_1.3.3 codetools_0.2-18
## [16] splines_4.1.2 doParallel_1.0.17 cachem_1.0.6
## [19] impute_1.68.0 knitr_1.37 polynom_1.4-0
## [22] Formula_1.2-4 jsonlite_1.7.3 broom_0.7.12
## [25] cluster_2.1.2 GO.db_3.14.0 dbplyr_2.1.1
## [28] png_0.1-7 compiler_4.1.2 httr_1.4.2
## [31] backports_1.4.1 assertthat_0.2.1 Matrix_1.5-1
## [34] fastmap_1.1.0 cli_3.6.1 htmltools_0.5.2
## [37] tools_4.1.2 gtable_0.3.0 glue_1.6.1
## [40] GenomeInfoDbData_1.2.7 Rcpp_1.0.8 carData_3.0-5
## [43] Biobase_2.54.0 cellranger_1.1.0 jquerylib_0.1.4
## [46] vctrs_0.6.1 Biostrings_2.62.0 nlme_3.1-153
## [49] preprocessCore_1.56.0 iterators_1.0.14 xfun_0.29
## [52] rvest_1.0.2 lifecycle_1.0.3 rstatix_0.7.0
## [55] zlibbioc_1.40.0 scales_1.2.1 hms_1.1.1
## [58] parallel_4.1.2 RColorBrewer_1.1-2 yaml_2.3.5
## [61] memoise_2.0.1 gridExtra_2.3 sass_0.4.0
## [64] rpart_4.1-15 latticeExtra_0.6-29 stringi_1.7.6
## [67] RSQLite_2.2.10 highr_0.9 S4Vectors_0.32.3
## [70] foreach_1.5.2 checkmate_2.0.0 BiocGenerics_0.40.0
## [73] GenomeInfoDb_1.30.1 rlang_1.1.0 pkgconfig_2.0.3
## [76] bitops_1.0-7 matrixStats_0.61.0 evaluate_0.15
## [79] lattice_0.20-45 labeling_0.4.2 htmlwidgets_1.5.4
## [82] bit_4.0.4 tidyselect_1.1.2 magrittr_2.0.2
## [85] R6_2.5.1 IRanges_2.28.0 generics_0.1.2
## [88] Hmisc_4.6-0 DBI_1.1.2 mgcv_1.8-38
## [91] pillar_1.7.0 haven_2.4.3 foreign_0.8-81
## [94] withr_2.5.0 abind_1.4-5 survival_3.2-13
## [97] KEGGREST_1.34.0 RCurl_1.98-1.6 nnet_7.3-16
## [100] car_3.0-12 modelr_0.1.8 crayon_1.5.0
## [103] utf8_1.2.2 tzdb_0.2.0 rmarkdown_2.11
## [106] jpeg_0.1-9 grid_4.1.2 readxl_1.3.1
## [109] data.table_1.14.2 blob_1.2.2 reprex_2.0.1
## [112] digest_0.6.29 stats4_4.1.2 munsell_0.5.0
## [115] bslib_0.3.1